# Scrapy settings for example project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
#     http://doc.scrapy.org/topics/settings.html
#
SPIDER_MODULES = ['example.spiders']
NEWSPIDER_MODULE = 'example.spiders'

USER_AGENT = 'scrapy-redis (+https://github.com/rolando/scrapy-redis)'

DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
SCHEDULER = "scrapy_redis.scheduler.Scheduler"
SCHEDULER_PERSIST = True
#默认的按优先级排序
SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderPriorityQueue"
#按先进先出排序
# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderQueue"

#按后进先出排序（lifo）
#SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderStack"

ITEM_PIPELINES = {
    'example.pipelines.ExamplePipeline': 300,
    'scrapy_redis.pipelines.RedisPipeline': 400,
}

#制定redis数据库的链接参数
REDIS_HOST = '127.0.0.1'
REDIS_PORT = 6379

LOG_LEVEL = 'DEBUG'
#默认情况下，ＲＦＰＤupeFilter纸记录第一个重复请求，将DUPEFILTER_DEBUG设置为True会记录所有重复的请求
DUPEFILTER_DEBUG = True

# Introduce an artifical delay to make use of parallelism. to speed up the
# crawl.
DOWNLOAD_DELAY = 1
#覆盖默认请求头，可以自己编写Downloader Middlewares设置代理和user_agen
DEFAULT_REQUEST_HEADERS={
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.8',
    'Connection': 'keep-alive',
    'Accept-Encoding': 'gzip, deflate, sdch'
}
